
from llama_index.readers.web import SimpleWebPageReader
documents = SimpleWebPageReader(html_to_text=True).load_data(
    ["https://www.usian.cn/"]
)
print(documents[0].text)
